# File Information --------------------------------------------------------

  # File Name: TeachingMethods.R				
  # Date: 10 June 2021								
  # Author: Clayton Webb		
  # Input File: methodsclean.csv							
  # Version: R version 4.0.2 (2020-06-22) -- "Taking Off Again"

# Clear R -----------------------------------------------------------------
  rm(list = ls())


# Packages ----------------------------------------------------------------

  # Install Packages
  #  install.packages(c("ggplot2","viridis","ggpubr","gridExtra","RColorBrewer","base2grob"))

  # Load Packages
    library(ggplot2)
    library(viridis)
    library(ggpubr)
    library(gridExtra)
    library(RColorBrewer)

# Load Data ---------------------------------------------------------------

  # Working Directory
    setwd("~/Dropbox/Jordan-Webb/IMC Teaching Methods/Data/2020-9-14 10.52 PM/")
  
  # Load Data
    data <- read.csv("MethodsSurveyCleanV1.csv")
  
  # Variables  
    names(data)
    
# Clean Data --------------------------------------------------------------

  # Keep everyone who made it to rank
    data.comp <- data[!is.na(data[,"rank"]),]
    
  # Code rank variable    
    data.comp$Rank <- data.comp$rank
    data.comp$Rank[data.comp$Rank > 0 & data.comp$Rank < 5] <- "Non-Tenure-Track"
    data.comp$Rank[data.comp$Rank ==  5] <- "Tenure-Track Assistant"
    data.comp$Rank[data.comp$Rank ==  6] <- "Tenured Associate"
    data.comp$Rank[data.comp$Rank ==  7] <- "Tenured Full"
    data.comp$Rank[data.comp$Rank ==  8] <- "Tenured Full"
    
    data.comp$ugteaching1[data.comp$ugteaching1 == 8] <- NA
    data.comp$ugteaching2[data.comp$ugteaching2 == 8] <- NA
    data.comp$ugteaching3[data.comp$ugteaching3 == 8] <- NA
    data.comp$gteaching1[data.comp$gteaching1 == 8] <- NA
    data.comp$gteaching2[data.comp$gteaching2 == 8] <- NA
    data.comp$gteaching3[data.comp$gteaching3 == 8] <- NA
    
  # Teaching Valued
    data.comp$UGValue <- data.comp$ugteaching3
    data.comp$UGValue[data.comp$UGValue < 5] <- "Any Disagree"
    data.comp$UGValue[data.comp$UGValue == 5] <- "Somewhat Agree"
    data.comp$UGValue[data.comp$UGValue == 6] <- "Agree"
    data.comp$UGValue[data.comp$UGValue == 7] <- "Strongly Agree"
    
    data.comp$GValue <- data.comp$gteaching3
    data.comp$GValue[data.comp$GValue < 5] <- "Any Disagree"
    data.comp$GValue[data.comp$GValue == 5] <- "Somewhat Agree"
    data.comp$GValue[data.comp$GValue == 6] <- "Agree"
    data.comp$GValue[data.comp$GValue == 7] <- "Strongly Agree"
    
    data.comp$UGValue <- factor(data.comp$UGValue, levels = c("Any Disagree", "Somewhat Agree", "Agree", "Strongly Agree"))
    data.comp$GValue <- factor(data.comp$GValue, levels = c("Any Disagree", "Somewhat Agree", "Agree", "Strongly Agree"))
    
  # Enjoy Teaching    
    data.comp$UGEnjoy <- data.comp$ugteaching1
    data.comp$UGEnjoy[data.comp$UGEnjoy < 6] <- "Somewhat Agree or Worse"
    data.comp$UGEnjoy[data.comp$UGEnjoy == 6] <- "Agree"
    data.comp$UGEnjoy[data.comp$UGEnjoy == 7] <- "Strongly Agree"
    
    data.comp$GEnjoy <- data.comp$gteaching1
    data.comp$GEnjoy[data.comp$GEnjoy < 6] <- "Somewhat Agree or Worse"
    data.comp$GEnjoy[data.comp$GEnjoy == 6] <- "Agree"
    data.comp$GEnjoy[data.comp$GEnjoy == 7] <- "Strongly Agree"
    
    data.comp$UGEnjoy <- factor(data.comp$UGEnjoy, levels = c("Somewhat Agree or Worse", "Agree", "Strongly Agree"))
    data.comp$GEnjoy <- factor(data.comp$GEnjoy, levels = c("Somewhat Agree or Worse", "Agree", "Strongly Agree"))
    
  # Conflict with Students
    data.comp$ugconflict.recode <- data.comp$ugconflict - 6
    data.comp$earlyugevaluations.recode <- data.comp$earlyugevaluations - 6
    
  # Teaching Evaluations  
    data.comp$gconflict.recode <- data.comp$gconflict - 6
    data.comp$earlygevaluations.recode <- data.comp$earlygevaluations - 6
    
  # Demographics  
    
  # Sex
    data.comp$sex.word <- data.comp$sex
    data.comp$sex.word[data.comp$sex == 1] <- "Male"
    data.comp$sex.word[data.comp$sex == 2] <- "Female"
    data.comp$sex.word[data.comp$sex == 4] <- "Non-binary"
    data.comp$male <- data.comp$sex
    data.comp$male[data.comp$sex == 1] <- 1
    data.comp$male[data.comp$sex == 2] <- 0
    data.comp$male[data.comp$sex == 4] <- NA
    
    summary(data.comp$male)
    
  # Teach in the U.S.
    data.comp$teach.US <- data.comp$teach.country
    data.comp$teach.US[data.comp$teach.country == 187] <- "US"
    data.comp$teach.US[data.comp$teach.country != 187] <- "Non-US"
    data.comp$teach.US.num <- data.comp$teach.country
    data.comp$teach.US.num[data.comp$teach.country == 187] <- 1
    data.comp$teach.US.num[data.comp$teach.country != 187] <- 0
    
    summary(data.comp$teach.US.num)
    
  # Race
    data.comp$white.lab <- "Non-White"
    data.comp$white.lab[data.comp$race == "3"] <- "White"
    
    data.comp$white.num <- 0
    data.comp$white.num[data.comp$race == "3"] <- 1
    
    summary(data.comp$white.num)
    
  # Rank
    data.comp$rank.lab <- data.comp$rank
    data.comp$rank.lab[data.comp$rank.lab == 1] <- "Non-\nTenure-Track" # "Graduate \n Student"
    data.comp$rank.lab[data.comp$rank.lab == 2] <- "Non-\nTenure-Track" # "Non-TT \n Lecturer"
    data.comp$rank.lab[data.comp$rank.lab == 3] <- "Non-\nTenure-Track" # "Adjunct"
    data.comp$rank.lab[data.comp$rank.lab == 4] <- "Non-\nTenure-Track" # "TT Teaching Lecturer"
    data.comp$rank.lab[data.comp$rank.lab == 5] <- "Asst.\nProfessor"
    data.comp$rank.lab[data.comp$rank.lab == 6] <- "Assoc.\nProfessor"
    data.comp$rank.lab[data.comp$rank.lab == 7] <- "Full\nProfessor"
    data.comp$rank.lab[data.comp$rank.lab == 8] <- "Endowed\nProfessor"
    
  # Rank spelled for descriptives
    data.comp$rank.lab.full <- data.comp$rank
    data.comp$rank.lab.full[data.comp$rank.lab.full == 1] <- "Non-\nTenure-Track" # "Graduate \n Student"
    data.comp$rank.lab.full[data.comp$rank.lab.full == 2] <- "Non-\nTenure-Track" # "Non-TT \n Lecturer"
    data.comp$rank.lab.full[data.comp$rank.lab.full == 3] <- "Non-\nTenure-Track" # "Adjunct"
    data.comp$rank.lab.full[data.comp$rank.lab.full == 4] <- "Non-\nTenure-Track" # "TT Teaching Lecturer"
    data.comp$rank.lab.full[data.comp$rank.lab.full == 5] <- "Assistant\nProfessor"
    data.comp$rank.lab.full[data.comp$rank.lab.full == 6] <- "Associate\nProfessor"
    data.comp$rank.lab.full[data.comp$rank.lab.full == 7] <- "Full\nProfessor"
    data.comp$rank.lab.full[data.comp$rank.lab.full == 8] <- "Endowed\nProfessor"
    
    table(data.comp$rank.lab.full)
    
  # Teaching      
    
  # Years at current school
    summary(data.comp$yearscurrentschool)
    
  # Teach G and UG Methods
    data.comp$teachugmethodsword <- data.comp$teachugmethodsbin <- data.comp$teachugmethods
    data.comp$teachugmethodsbin[data.comp$teachugmethodsbin == 2] <- 0
    data.comp$teachugmethodsword[data.comp$teachugmethodsword == 2] <- "No"
    data.comp$teachugmethodsword[data.comp$teachugmethodsword == 1] <- "Yes"
    summary(data.comp$teachugmethodsbin)
    
    data.comp$teachgmethodsword <- data.comp$teachgmethodsbin <- data.comp$teachgmethods
    data.comp$teachgmethodsbin[data.comp$teachgmethodsbin == 2] <- 0
    data.comp$teachgmethodsword[data.comp$teachgmethodsword == 2] <- "No"
    data.comp$teachgmethodsword[data.comp$teachgmethodsword == 1] <- "Yes"
    summary(data.comp$teachgmethodsbin)
    
    data.comp$rank.lab <- factor(data.comp$rank.lab, levels = c("Non-\nTenure-Track", "Asst.\nProfessor",
                                                                "Assoc.\nProfessor", "Full\nProfessor", "Endowed\nProfessor"))
    
    data.comp$rank.lab.full <- factor(data.comp$rank.lab.full, levels = c("Non-\nTenure-Track", "Assistant\nProfessor",
                                                                          "Associate\nProfessor", "Full\nProfessor", "Endowed\nProfessor"))
    
  # Inspect Data      
    summary(data.comp$male)
    summary(data.comp$phd.year)
    summary(data.comp$teach.US.num)
    summary(data.comp$white.num)
    summary(data.comp$yearscurrentschool)
    summary(data.comp$teachugmethodsbin)
    summary(data.comp$teachgmethodsbin)
    

# Figure 1 ----------------------------------------------------------------
    
  #pdf("FINAL_descrip_barchart.pdf")
  ggplot(data = data.desc, aes(x = Question, y = Frequency, fill = Categories, label = Labels)) + 
    geom_bar(stat = "identity") +
    scale_fill_grey(start = 0.5, end = 0.9) + #"Predicted Value", colours=c("grey10", "grey90")) +
    theme_bw() +
    geom_text(size = 3, position = position_stack(vjust = 0.5)) +
    labs(y = "Percent of Sample", x = "Question") + 
    theme(legend.position = "none")
  dev.off()    
  

# Figure 2 ----------------------------------------------------------------

  # Subset those who teach graduate methods
    data.comp.g <- data.comp[data.comp$teachgmethods == 1,]
    
  # First set: rank by responses
    data.g.plot.rank <- data.frame(Rank = rep(data.comp.g$Rank, 7), 
                                   Question = c(rep("Material", length(data.comp.g$earlygmaterial)),
                                                rep("Programming", length(data.comp.g$earlygprograms)),
                                                rep("Writing", length(data.comp.g$earlygwriting)),
                                                rep("Theory", length(data.comp.g$earlygtheory)),
                                                rep("Measurement", length(data.comp.g$earlygmeasure)),
                                                rep("Statistics", length(data.comp.g$earlygstats)),
                                                rep("Data Analysis", length(data.comp.g$earlygda))),
                                   Response = c(data.comp.g$earlygmaterial, data.comp.g$earlygprograms,
                                                data.comp.g$earlygwriting, data.comp.g$earlygtheory, 
                                                data.comp.g$earlygmeasure, data.comp.g$earlygstats, 
                                                data.comp.g$earlygda))
    
    data.g.ggplot.rank.temp <- data.frame(table(data.g.plot.rank$Rank, data.g.plot.rank$Response, data.g.plot.rank$Question))
    names(data.g.ggplot.rank.temp) <- c("Rank", "Response", "Question", "Total")
    
    data.g.total.glue <- data.frame(Rank = "Overall",
                                    Response = c(names(table(data.comp.g$earlygmaterial)), names(table(data.comp.g$earlygprograms)), 
                                                 names(table(data.comp.g$earlygwriting)), names(table(data.comp.g$earlygtheory)),
                                                 names(table(data.comp.g$earlygmeasure)), names(table(data.comp.g$earlygstats)),
                                                 names(table(data.comp.g$earlygda))),
                                    Question = c(rep("Material", length(table(data.comp.g$earlygmaterial))), 
                                                 rep("Programming", length(table(data.comp.g$earlygprograms))),  
                                                 rep("Writing", length(table(data.comp.g$earlygwriting))), 
                                                 rep("Theory", length(table(data.comp.g$earlygtheory))), 
                                                 rep("Measurement", length(table(data.comp.g$earlygmeasure))), 
                                                 rep("Statistics", length(table(data.comp.g$earlygstats))),  
                                                 rep("Data Analysis", length(table(data.comp.g$earlygda)))), 
                                    Total = c(table(data.comp.g$earlygmaterial), table(data.comp.g$earlygprograms), 
                                              table(data.comp.g$earlygwriting), table(data.comp.g$earlygtheory),
                                              table(data.comp.g$earlygmeasure), table(data.comp.g$earlygstats),
                                              table(data.comp.g$earlygda)))
    
    data.g.ggplot.rank <- rbind(data.g.ggplot.rank.temp, data.g.total.glue)
    
    data.g.ggplot.rank$Rank <- factor(data.g.ggplot.rank$Rank, levels = c("Non-Tenure-Track", "Tenure-Track Assistant", "Tenured Associate", 
                                                                          "Tenured Full", "Overall"))
    
    data.g.ggplot.rank$Responses <- as.numeric(data.g.ggplot.rank$Response)
    data.g.ggplot.rank$Responses[data.g.ggplot.rank$Responses == 1] <- "1 (Too little emphasis)"
    data.g.ggplot.rank$Responses[data.g.ggplot.rank$Responses == 3] <- "3 (Correct emphasis)"
    data.g.ggplot.rank$Responses[data.g.ggplot.rank$Responses == 5] <- "5 (Too much emphasis)"
    
    
  # Create dimension data with and without the overall material question (for plotting)
    data.g.ggplot.rank.justmaterial <- data.g.ggplot.rank
    data.g.ggplot.rank.justmaterial$Question <- droplevels(data.g.ggplot.rank.justmaterial$Question, exclude = c("Programming", "Writing", "Theory", 
                                                                                                                 "Measurement", "Statistics", "Data Analysis"))
    data.g.ggplot.rank.justmaterial <- na.omit(data.g.ggplot.rank.justmaterial)
    
    data.g.ggplot.rank.nomaterial <- data.g.ggplot.rank
    data.g.ggplot.rank.nomaterial$Question <- droplevels(data.g.ggplot.rank.nomaterial$Question, exclude = c("Material"))
    data.g.ggplot.rank.nomaterial <- na.omit(data.g.ggplot.rank.nomaterial)
    
  # Plot 
    # pdf("/Users/scj0014/Myfiles/Dropbox/Jordan-Webb/IMC Teaching Methods/Data/2020-9-14 10.52 PM/FINAL_g_dimensions_withtotal.pdf")
    ggplot(data.g.ggplot.rank.nomaterial, aes(fill = Responses, y = Total, x = Rank)) +
      geom_col(position = "fill") + 
      scale_fill_grey(start = 0.0, end = 0.9) + #"Predicted Value", colours=c("grey10", "grey90")) +
      # scale_fill_viridis(discrete = TRUE, name = "") + 
      theme_bw() +
      facet_wrap(~ Question, ncol = 3) +
      scale_y_continuous(labels = scales::percent) +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    dev.off()  
    

# Figure 3 ----------------------------------------------------------------

  # Subset those who teach undergraduate methods   
    data.comp.ug <- data.comp[data.comp$teachugmethods == 1,]
    
  # First set: rank by responses
    data.ug.plot.rank <- data.frame(Rank = rep(data.comp.ug$Rank, 7), 
                                    Question = c(rep("Material", length(data.comp.ug$earlyugmaterial)),
                                                 rep("Programming", length(data.comp.ug$earlyugprograms)),
                                                 rep("Writing", length(data.comp.ug$earlyugwriting)),
                                                 rep("Theory", length(data.comp.ug$earlyugtheory)),
                                                 rep("Measurement", length(data.comp.ug$earlyugmeasure)),
                                                 rep("Statistics", length(data.comp.ug$earlyugstats)),
                                                 rep("Data Analysis", length(data.comp.ug$earlyugda))),
                                    Response = c(data.comp.ug$earlyugmaterial, data.comp.ug$earlyugprograms,
                                                 data.comp.ug$earlyugwriting, data.comp.ug$earlyugtheory, 
                                                 data.comp.ug$earlyugmeasure, data.comp.ug$earlyugstats, 
                                                 data.comp.ug$earlyugda))
    
    data.ug.ggplot.rank.temp <- data.frame(table(data.ug.plot.rank$Rank, data.ug.plot.rank$Response, data.ug.plot.rank$Question))
    names(data.ug.ggplot.rank.temp) <- c("Rank", "Response", "Question", "Total")
    
    data.ug.total.glue <- data.frame(Rank = "Overall",
                                     Response = c(names(table(data.comp.ug$earlyugmaterial)), names(table(data.comp.ug$earlyugprograms)), 
                                                  names(table(data.comp.ug$earlyugwriting)), names(table(data.comp.ug$earlyugtheory)),
                                                  names(table(data.comp.ug$earlyugmeasure)), names(table(data.comp.ug$earlyugstats)),
                                                  names(table(data.comp.ug$earlyugda))),
                                     Question = c(rep("Material", length(table(data.comp.ug$earlyugmaterial))), 
                                                  rep("Programming", length(table(data.comp.ug$earlyugprograms))),  
                                                  rep("Writing", length(table(data.comp.ug$earlyugwriting))), 
                                                  rep("Theory", length(table(data.comp.ug$earlyugtheory))), 
                                                  rep("Measurement", length(table(data.comp.ug$earlyugmeasure))), 
                                                  rep("Statistics", length(table(data.comp.ug$earlyugstats))),  
                                                  rep("Data Analysis", length(table(data.comp.ug$earlyugda)))), 
                                     Total = c(table(data.comp.ug$earlyugmaterial), table(data.comp.ug$earlyugprograms), 
                                               table(data.comp.ug$earlyugwriting), table(data.comp.ug$earlyugtheory),
                                               table(data.comp.ug$earlyugmeasure), table(data.comp.ug$earlyugstats),
                                               table(data.comp.ug$earlyugda)))
    
    data.ug.ggplot.rank <- rbind(data.ug.ggplot.rank.temp, data.ug.total.glue)
    
    data.ug.ggplot.rank$Rank <- factor(data.ug.ggplot.rank$Rank, levels = c("Non-Tenure-Track", "Tenure-Track Assistant", "Tenured Associate", 
                                                                            "Tenured Full", "Overall"))
    
    data.ug.ggplot.rank$Responses <- as.numeric(data.ug.ggplot.rank$Response)
    data.ug.ggplot.rank$Responses[data.ug.ggplot.rank$Responses == 1] <- "1 (Too little emphasis)"
    data.ug.ggplot.rank$Responses[data.ug.ggplot.rank$Responses == 3] <- "3 (Correct emphasis)"
    data.ug.ggplot.rank$Responses[data.ug.ggplot.rank$Responses == 5] <- "5 (Too much emphasis)"
    
    
  # Create dimension data with and without the overall material question (for plotting)
    data.ug.ggplot.rank.justmaterial <- data.ug.ggplot.rank
    data.ug.ggplot.rank.justmaterial$Question <- droplevels(data.ug.ggplot.rank.justmaterial$Question, exclude = c("Programming", "Writing", "Theory", 
                                                                                                                   "Measurement", "Statistics", "Data Analysis"))
    data.ug.ggplot.rank.justmaterial <- na.omit(data.ug.ggplot.rank.justmaterial)
    
    data.ug.ggplot.rank.nomaterial <- data.ug.ggplot.rank
    data.ug.ggplot.rank.nomaterial$Question <- droplevels(data.ug.ggplot.rank.nomaterial$Question, exclude = c("Material"))
    data.ug.ggplot.rank.nomaterial <- na.omit(data.ug.ggplot.rank.nomaterial)
    
  # Plot  
    # pdf("/Users/scj0014/Myfiles/Dropbox/Jordan-Webb/IMC Teaching Methods/Data/2020-9-14 10.52 PM/FINAL_ug_dimensions_withtotal.pdf")
    ggplot(data.ug.ggplot.rank.nomaterial, aes(fill = Responses, y = Total, x = Rank)) +
      geom_col(position = "fill") + 
      scale_fill_grey(start = 0.0, end = 0.9) + #"Predicted Value", colours=c("grey10", "grey90")) +
      # scale_fill_viridis(discrete = TRUE, name = "") + 
      theme_bw() +
      facet_wrap(~ Question, ncol = 3) +
      scale_y_continuous(labels = scales::percent) +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    dev.off()
    
    